1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.smartcrawler.common;
28
29 import java.net.MalformedURLException;
30 import java.net.URL;
31 import org.apache.log4j.Logger;
32
33
34 /***
35 * This object represents a simple html link.
36 *
37 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
38 * @version <tt>$Revision: 1.8 $</tt>
39 */
40 public class Link {
41
42 private static Logger log = SCLogger.getLogger(Link.class);
43
44 private String urlStr;
45 private URL url;
46
47 /***
48 * Constructor.
49 *
50 * @param urlStr The link string
51 * @throws org.smartcrawler.common.MalformedLinkException
52 *
53 */
54 public Link(String str) throws MalformedLinkException {
55
56 if (!str.toLowerCase().startsWith("http://")) {
57 str = "http://" + str;
58 }
59 try {
60 this.url = new URL(str);
61 } catch (MalformedURLException e) {
62 throw new MalformedLinkException("The link " + str
63 + " does not represents a valid URL");
64 }
65
66 this.urlStr = this.url.getProtocol() + "://"
67 + this.url.getHost()
68 + (this.url.getPath() == null ? "" : this.url.getPath())
69 + (this.url.getQuery() == null ? "" : "?" + this.url.getQuery());
70
71 }
72
73 /***
74 *
75 * @return
76 */
77 public URL getURL() {
78 return this.url;
79 }
80
81 /***
82 *
83 * @return
84 */
85 public String toString() {
86 return urlStr;
87 }
88
89 /***
90 *
91 * @param includeFile
92 * @return
93 */
94 public String getPath(boolean includeFile) {
95 String res = this.url.getPath();
96 if (!includeFile) {
97 int idx = res.lastIndexOf("/");
98 if (idx >= 0 ) {
99 String tmp = res.substring(idx);
100 if (tmp.indexOf(".") > 0) {
101 res = res.substring(0, idx);
102 }
103 }
104 if (res.endsWith("/")) {
105 res = res.substring(0, res.length() - 1);
106 }
107 }
108 log.debug("getPath: urlStr=" + urlStr + " res=" + res);
109 return res;
110 }
111
112 /***
113 *
114 * @return
115 */
116 public String getHost() {
117 return this.url.getHost();
118 }
119
120 /***
121 *
122 * @param objLink
123 * @return
124 */
125 public boolean equals(Object objLink) {
126 if (this == objLink) {
127 return true;
128 }
129 if(objLink instanceof Link) {
130 Link link = (Link)objLink;
131 return this.toString().equals(link.toString());
132 } else {
133 return false;
134 }
135 }
136
137 /***
138 *
139 * @return
140 */
141 public synchronized int hashCode() {
142 return this.toString().hashCode();
143 }
144 }